import pandas as pd  
from sklearn.preprocessing import StandardScaler, LabelEncoder  
from sklearn.cluster import KMeans  
from sklearn.metrics import silhouette_score  
import matplotlib.pyplot as plt  
from decision_company import read_csv_file, col_copy, create_standard_scaler, fit_transform_standard_scaler, create_label_encoder, fit_transform_label_encoder, get_dummies, create_kmeans, fit_predict_kmeans, get_silhouette_score, show_plots, create_figure, set_plot_title, set_yaxis_label, set_plot_xlabel, save_plot, plot, grid, col_assign_val
  
# Load the dataset  
credit_customers = read_csv_file("credit_customers.csv")  
  
# Extract the important columns  
important_columns = ['credit_history', 'age', 'employment', 'credit_amount', 'savings_status']  
data_for_clustering = col_copy(credit_customers, important_columns)  
  
# Apply Label Encoding to 'savings_status' and 'employment'  
le_savings_status = create_label_encoder()  
le_employment = create_label_encoder()  

col_assign_val(data_for_clustering, 'savings_status', fit_transform_label_encoder(le_savings_status, data_for_clustering['savings_status']))  
col_assign_val(data_for_clustering, 'employment', fit_transform_label_encoder(le_employment, data_for_clustering['employment']))
  
# Apply One-Hot Encoding to 'credit_history'  
data_for_clustering = get_dummies(data_for_clustering, columns=['credit_history'], drop_first=True)  
  
# Normalize the data using Standard Scaling  
scaler = create_standard_scaler()
data_for_clustering_scaled = fit_transform_standard_scaler(scaler, data_for_clustering)  
  
# Determine the sum of squared distances for different number of clusters  
# Calculate silhouette scores for different number of clusters  
silhouette_scores = []  
  
for cluster_num in range(2, 15):  # Starting from 2 because silhouette score is not defined for 1 cluster  
    kmeans = create_kmeans(n_clusters=cluster_num, random_state=42)  
    cluster_labels = fit_predict_kmeans(kmeans, data_for_clustering_scaled)  
    silhouette_avg = get_silhouette_score(data_for_clustering_scaled, cluster_labels)  
    silhouette_scores.append(silhouette_avg)  
  
# Plot silhouette scores  
create_figure(figsize=(10, 6))  
plot(range(2, 15), silhouette_scores, marker='o', linestyle='--')  
set_plot_title('Silhouette Scores for Different Number of Clusters')  
set_plot_xlabel('Number of Clusters')  
set_yaxis_label('Silhouette Score')  
grid(True)  
save_plot('ref_result/Silhouette_Scores.png')  
show_plots()  